Predicting Pathogen from RNAseq data

Create the model using hyperparameter tuning


In [1]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from xgboost import XGBClassifier
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib
from matplotlib import pyplot
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

patient_groups=["control", "viral", "bacterial", "fungal"]
group_id = lambda name: patient_groups.index(name)

X = pd.DataFrame.from_csv("combineSV_WTcpmtable_v2.txt", sep="\s+").T
y = [group_id("bacterial")] * 29 \
    + [group_id("viral")] * 42 \
    + [group_id("fungal")] * 10 \
    + [group_id("control")] * 61
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

print "Training set has %d samples. Testing set has %d samples." % (len(X_train), len(X_test))

model = XGBClassifier(objective="multi:softprob")
# Number of boosted trees to fit.
n_estimators = range(1, 11, 2)
# Maximum tree depth for base learners.
max_depth = range(2, 11, 2)
param_grid = dict(max_depth=max_depth, n_estimators=n_estimators)
kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=7)
grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", 
                           n_jobs=1, cv=kfold, verbose=1)
grid_result = grid_search.fit(X_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# plot results
%matplotlib inline

scores = np.array(means).reshape(len(max_depth), len(n_estimators))
for i, value in enumerate(max_depth):
    pyplot.plot(n_estimators, scores[i], label='depth: ' + str(value))
pyplot.legend()
pyplot.xlabel('n_estimators')
pyplot.ylabel('Log Loss')


Training set has 99 samples. Testing set has 43 samples.
Fitting 2 folds for each of 25 candidates, totalling 50 fits
[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:   46.4s finished
Best: -1.313247 using {'n_estimators': 7, 'max_depth': 2}
-1.359561 (0.002301) with: {'n_estimators': 1, 'max_depth': 2}
-1.332849 (0.012278) with: {'n_estimators': 3, 'max_depth': 2}
-1.326635 (0.017019) with: {'n_estimators': 5, 'max_depth': 2}
-1.313247 (0.017527) with: {'n_estimators': 7, 'max_depth': 2}
-1.321541 (0.022684) with: {'n_estimators': 9, 'max_depth': 2}
-1.360918 (0.000564) with: {'n_estimators': 1, 'max_depth': 4}
-1.336042 (0.008165) with: {'n_estimators': 3, 'max_depth': 4}
-1.337598 (0.027781) with: {'n_estimators': 5, 'max_depth': 4}
-1.338183 (0.034502) with: {'n_estimators': 7, 'max_depth': 4}
-1.339095 (0.041565) with: {'n_estimators': 9, 'max_depth': 4}
-1.360918 (0.000564) with: {'n_estimators': 1, 'max_depth': 6}
-1.336042 (0.008165) with: {'n_estimators': 3, 'max_depth': 6}
-1.337598 (0.027781) with: {'n_estimators': 5, 'max_depth': 6}
-1.338183 (0.034502) with: {'n_estimators': 7, 'max_depth': 6}
-1.339095 (0.041565) with: {'n_estimators': 9, 'max_depth': 6}
-1.360918 (0.000564) with: {'n_estimators': 1, 'max_depth': 8}
-1.336042 (0.008165) with: {'n_estimators': 3, 'max_depth': 8}
-1.337598 (0.027781) with: {'n_estimators': 5, 'max_depth': 8}
-1.338183 (0.034502) with: {'n_estimators': 7, 'max_depth': 8}
-1.339095 (0.041565) with: {'n_estimators': 9, 'max_depth': 8}
-1.360918 (0.000564) with: {'n_estimators': 1, 'max_depth': 10}
-1.336042 (0.008165) with: {'n_estimators': 3, 'max_depth': 10}
-1.337598 (0.027781) with: {'n_estimators': 5, 'max_depth': 10}
-1.338183 (0.034502) with: {'n_estimators': 7, 'max_depth': 10}
-1.339095 (0.041565) with: {'n_estimators': 9, 'max_depth': 10}
Out[1]:
<matplotlib.text.Text at 0x111167350>

In [2]:
%matplotlib inline
from learning_curves import plot_learning_curve
from sklearn.model_selection import ShuffleSplit
import matplotlib.pyplot as plt

best_estimator = XGBClassifier(**grid_search.best_params_)

title = "Learning Curves for xgboost"
cv = ShuffleSplit(n_splits=2, test_size=0.3, random_state=0)
plot_learning_curve(best_estimator, title, X, y, (0.2, 1.01), cv=cv, n_jobs=1)
plt.show()


None

Make predictions based on the model


In [3]:
# make predictions
predicted = grid_result.predict(X_test)

print "Accuracy was %.2f%%\n" % (100 * accuracy_score(y_test, predicted))
print classification_report(y_test, predicted, target_names=patient_groups)
cm = confusion_matrix(y_test, predicted, labels=range(len(patient_groups)))
print "Confusion Matrix: rows = predictions, columns = actual\n"
row_format ="{:>15}" * (len(patient_groups)+1)
print row_format.format("", *patient_groups)
for disease, row in zip(patient_groups, cm):
    print row_format.format(disease, *row)


Accuracy was 51.16%

             precision    recall  f1-score   support

    control       0.64      0.74      0.68        19
      viral       0.30      0.27      0.29        11
  bacterial       0.56      0.50      0.53        10
     fungal       0.00      0.00      0.00         3

avg / total       0.49      0.51      0.50        43

Confusion Matrix: rows = predictions, columns = actual

                       control          viral      bacterial         fungal
        control             14              2              3              0
          viral              6              3              1              1
      bacterial              1              3              5              1
         fungal              1              2              0              0

Review patients the model misclassified


In [4]:
probs = np.array(grid_result.predict_proba(X_test))
probs *= 100

d = {"Probability Control": probs[:, patient_groups.index("control")],
     "Probability Viral": probs[:, patient_groups.index("viral")],
     "Probability Bacterial": probs[:, patient_groups.index("bacterial")],
     "Predicted Class": [patient_groups[i] for i in grid_result.predict(X_test)],
     "Actual Class": [patient_groups[i] for i in y_test]}

patient_df = pd.DataFrame(d, index=X_test.index)
patient_df[patient_df["Predicted Class"] != patient_df["Actual Class"]]


Out[4]:
Actual Class Predicted Class Probability Bacterial Probability Control Probability Viral
MNC.014 viral control 23.392109 30.087852 24.744411
MNC.674 viral control 22.655106 34.055008 27.102434
MNC.098 viral control 14.892682 37.064793 33.956825
MN_223 bacterial control 22.889997 29.844179 25.616163
MNC.292 control bacterial 29.759371 23.411911 27.948860
MNC.131 control bacterial 31.754002 25.190596 24.783957
MN_326 control viral 20.577374 24.134228 35.826050
MNC.452.x bacterial viral 19.016665 29.888338 33.108788
MNC.412 viral fungal 13.519282 23.623735 22.788357
MN_368 control bacterial 34.392048 24.539768 21.278799
MNC.234 viral bacterial 27.746531 24.541880 25.473700
MN_201 fungal viral 28.813448 20.982773 31.147867
MNC.013 viral control 15.282658 52.321552 17.941254
MN_137 bacterial viral 20.282084 29.400450 31.134409
MNC.557 bacterial viral 19.837709 25.516045 36.179394
MN_226 fungal viral 24.359613 23.843870 34.391167
MN_283 bacterial fungal 29.151695 20.800621 19.847288
MNC.012.y viral control 28.730232 45.267612 13.496365
MNC.091 viral control 16.422787 42.907681 23.612793
MN_305 fungal control 25.740545 31.418831 27.724262
MNC.294 control viral 18.772388 28.390049 35.082394

In [5]:
from IPython.core.display import display, HTML

colname = "Importance"
df = pd.DataFrame(grid_result.best_estimator_.booster().get_score(importance_type="weight"),
                  index=[colname]).T
df = df[df[colname] > 0]
df = df.sort_values(by=[colname], ascending=False)

s="""
<h2>List of genes by importance</h2>
<p>Note: the NCBI link will open the target in a new window or tab.</p>
<table>
<tr><th>Gene</th><th>Importance</th><th>NCBI</th></tr>
"""

ncbi_url = lambda gene: "https://www.ncbi.nlm.nih.gov/gene/?term=%s[Gene Name] AND Human[Organism]" % (gene)

for index, row in df.iterrows():
    s += """
    <tr><td>%s</td><td>%d</td>
    <td><a target=\"_blank\" href=\"%s\">NCBI Search</a></td></tr>
    """ %(index, row[0], ncbi_url(index))
s += "</table>"

display(HTML(s))


List of genes by importance

Note: the NCBI link will open the target in a new window or tab.

GeneImportanceNCBI
RHCE7 NCBI Search
SDC44 NCBI Search
PTGES34 NCBI Search
PABPC14 NCBI Search
MOBP4 NCBI Search
BAZ1B4 NCBI Search
CAMK14 NCBI Search
LINC012474 NCBI Search
ABAT3 NCBI Search
CARD113 NCBI Search
DSCAM3 NCBI Search
A1CF2 NCBI Search
PRF12 NCBI Search
MUM12 NCBI Search
LOC1019295672 NCBI Search
WDR62 NCBI Search
EEF1A12 NCBI Search
ATP11B2 NCBI Search
EIF3L1 NCBI Search
ACVR11 NCBI Search
TTL1 NCBI Search
SOX51 NCBI Search
ABCA51 NCBI Search
RPSA1 NCBI Search
RPL341 NCBI Search
ABCA81 NCBI Search
RBP51 NCBI Search
ABI3BP1 NCBI Search
ACP11 NCBI Search
ADHFE11 NCBI Search
FAM206A1 NCBI Search
AHRR1 NCBI Search
MAP3K81 NCBI Search
MAP3K131 NCBI Search
LUZP11 NCBI Search
DNAJB121 NCBI Search
LINC005751 NCBI Search
JADE21 NCBI Search
G3BP21 NCBI Search
KLF131 NCBI Search

In [6]:
# Dump booster info
print grid_result.best_estimator_.base_score
print "\n".join(grid_result.best_estimator_.booster().get_dump())


0.5
0:[PABPC1<2108.71] yes=1,no=2,missing=1
	1:[PTGES3<90.9317] yes=3,no=4,missing=3
		3:leaf=-0.0248649
		4:leaf=0.115789
	2:[MOBP<129.057] yes=5,no=6,missing=5
		5:leaf=0.164045
		6:leaf=-0.0352941

0:[CARD11<115.479] yes=1,no=2,missing=1
	1:[RPSA<3.21717] yes=3,no=4,missing=3
		3:leaf=0.0513274
		4:leaf=-0.0434783
	2:[ABCA5<20.3476] yes=5,no=6,missing=5
		5:leaf=0.157895
		6:leaf=0.0117647

0:[LINC01247<0.844941] yes=1,no=2,missing=1
	1:[LOC101929567<349.976] yes=3,no=4,missing=3
		3:leaf=-0.0540773
		4:leaf=0.0956522
	2:[CAMK1<10.0913] yes=5,no=6,missing=5
		5:leaf=0.163636
		6:leaf=-0.0482759

0:[RHCE<37.9593] yes=1,no=2,missing=1
	1:[ACP1<248.937] yes=3,no=4,missing=3
		3:leaf=-0.0647273
		4:leaf=0.0588235
	2:[A1CF<30.3216] yes=5,no=6,missing=5
		5:leaf=0.12
		6:leaf=0.0117647

0:[PABPC1<2108.71] yes=1,no=2,missing=1
	1:[PTGES3<90.9317] yes=3,no=4,missing=3
		3:leaf=-0.0240633
		4:leaf=0.103782
	2:[MOBP<129.057] yes=5,no=6,missing=5
		5:leaf=0.140908
		6:leaf=-0.0348951

0:[SDC4<5.7941] yes=1,no=2,missing=1
	1:[CARD11<75.8036] yes=3,no=4,missing=3
		3:leaf=0.0100998
		4:leaf=0.139593
	2:leaf=-0.0596555

0:[LINC01247<0.844941] yes=1,no=2,missing=1
	1:[WDR6<337.282] yes=3,no=4,missing=3
		3:leaf=-0.0527528
		4:leaf=0.0922695
	2:[CAMK1<10.0913] yes=5,no=6,missing=5
		5:leaf=0.143667
		6:leaf=-0.047482

0:[RHCE<37.9593] yes=1,no=2,missing=1
	1:[LINC00575<94.8312] yes=3,no=4,missing=3
		3:leaf=-0.0634419
		4:leaf=0.0582775
	2:[ABAT<49.9433] yes=5,no=6,missing=5
		5:leaf=0.113719
		6:leaf=0.0103783

0:[PABPC1<2108.71] yes=1,no=2,missing=1
	1:[PTGES3<90.9317] yes=3,no=4,missing=3
		3:leaf=-0.0233558
		4:leaf=0.0940393
	2:[MOBP<129.057] yes=5,no=6,missing=5
		5:leaf=0.124608
		6:leaf=-0.0345373

0:[SDC4<5.7941] yes=1,no=2,missing=1
	1:[CARD11<75.8036] yes=3,no=4,missing=3
		3:leaf=0.00911136
		4:leaf=0.124169
	2:leaf=-0.0584118

0:[LINC01247<0.844941] yes=1,no=2,missing=1
	1:[WDR6<337.282] yes=3,no=4,missing=3
		3:leaf=-0.0513965
		4:leaf=0.08575
	2:[CAMK1<10.0913] yes=5,no=6,missing=5
		5:leaf=0.128376
		6:leaf=-0.0465911

0:[RHCE<37.9593] yes=1,no=2,missing=1
	1:[ADHFE1<111.115] yes=3,no=4,missing=3
		3:leaf=-0.0622423
		4:leaf=0.0565825
	2:[ACVR1<48.1243] yes=5,no=6,missing=5
		5:leaf=0.106738
		6:leaf=0.00958404

0:[PABPC1<2108.71] yes=1,no=2,missing=1
	1:[RPL34<68.1889] yes=3,no=4,missing=3
		3:leaf=-0.028314
		4:leaf=0.0712867
	2:[MOBP<129.057] yes=5,no=6,missing=5
		5:leaf=0.112101
		6:leaf=-0.0343725

0:[SDC4<5.7941] yes=1,no=2,missing=1
	1:[MUM1<27.7506] yes=3,no=4,missing=3
		3:leaf=0.0582352
		4:leaf=-0.055976
	2:leaf=-0.0572051

0:[LINC01247<0.844941] yes=1,no=2,missing=1
	1:[LOC101929567<349.976] yes=3,no=4,missing=3
		3:leaf=-0.0501281
		4:leaf=0.0805816
	2:[CAMK1<10.0913] yes=5,no=6,missing=5
		5:leaf=0.116117
		6:leaf=-0.0456158

0:[RHCE<37.9593] yes=1,no=2,missing=1
	1:[FAM206A<119.303] yes=3,no=4,missing=3
		3:leaf=-0.06116
		4:leaf=0.0554058
	2:[AHRR<53.1435] yes=5,no=6,missing=5
		5:leaf=0.100779
		6:leaf=0.00868072

0:[EEF1A1<5251.18] yes=1,no=2,missing=1
	1:[PTGES3<78.3124] yes=3,no=4,missing=3
		3:leaf=-0.0362949
		4:leaf=0.0942908
	2:[PRF1<34.6688] yes=5,no=6,missing=5
		5:leaf=0.115423
		6:leaf=-0.0203355

0:[JADE2<233.487] yes=1,no=2,missing=1
	1:[MAP3K13<174.184] yes=3,no=4,missing=3
		3:leaf=-0.0341893
		4:leaf=0.057816
	2:[ABCA8<7.8292] yes=5,no=6,missing=5
		5:leaf=0.122936
		6:leaf=-0.0378523

0:[DSCAM<6.96523] yes=1,no=2,missing=1
	1:[ATP11B<470.176] yes=3,no=4,missing=3
		3:leaf=-0.0560397
		4:leaf=0.0632467
	2:[BAZ1B<102.433] yes=5,no=6,missing=5
		5:leaf=0.11236
		6:leaf=-0.0508661

0:[RHCE<37.9593] yes=1,no=2,missing=1
	1:[MAP3K8<342.627] yes=3,no=4,missing=3
		3:leaf=-0.0601231
		4:leaf=0.0542153
	2:[ABAT<49.9433] yes=5,no=6,missing=5
		5:leaf=0.0955389
		6:leaf=0.00780039

0:[EIF3L<182.278] yes=1,no=2,missing=1
	1:[LUZP1<122.993] yes=3,no=4,missing=3
		3:leaf=-0.0345764
		4:leaf=0.0714058
	2:[SOX5<17.5] yes=5,no=6,missing=5
		5:leaf=0.101701
		6:leaf=-0.0322464

0:[SDC4<5.7941] yes=1,no=2,missing=1
	1:[MUM1<27.7506] yes=3,no=4,missing=3
		3:leaf=0.0497779
		4:leaf=-0.0539235
	2:leaf=-0.055282

0:[DSCAM<6.96523] yes=1,no=2,missing=1
	1:[TTL<144.481] yes=3,no=4,missing=3
		3:leaf=-0.0550153
		4:leaf=0.062154
	2:[BAZ1B<102.433] yes=5,no=6,missing=5
		5:leaf=0.103543
		6:leaf=-0.0496796

0:[RHCE<37.9593] yes=1,no=2,missing=1
	1:[RBP5<52.6298] yes=3,no=4,missing=3
		3:leaf=-0.0591756
		4:leaf=0.052677
	2:[A1CF<30.3216] yes=5,no=6,missing=5
		5:leaf=0.0903729
		6:leaf=0.00694476

0:[EEF1A1<5251.18] yes=1,no=2,missing=1
	1:[BAZ1B<112.009] yes=3,no=4,missing=3
		3:leaf=-0.0425257
		4:leaf=0.0633252
	2:[PRF1<34.6688] yes=5,no=6,missing=5
		5:leaf=0.0984699
		6:leaf=-0.0201828

0:[G3BP2<323.801] yes=1,no=2,missing=1
	1:[KLF13<11.0899] yes=3,no=4,missing=3
		3:leaf=0.0366868
		4:leaf=-0.0446938
	2:[ABI3BP<362.729] yes=5,no=6,missing=5
		5:leaf=0.107516
		6:leaf=0.00433634

0:[DSCAM<6.96523] yes=1,no=2,missing=1
	1:[ATP11B<470.176] yes=3,no=4,missing=3
		3:leaf=-0.0540243
		4:leaf=0.0592059
	2:[BAZ1B<102.433] yes=5,no=6,missing=5
		5:leaf=0.0955973
		6:leaf=-0.0486365

0:[RHCE<37.9593] yes=1,no=2,missing=1
	1:[DNAJB12<136.354] yes=3,no=4,missing=3
		3:leaf=-0.0582741
		4:leaf=0.0512088
	2:[ABAT<49.9433] yes=5,no=6,missing=5
		5:leaf=0.0858952
		6:leaf=0.00657047


In [7]:
idf = X_test.loc[:, df.index.tolist()].copy()
idf["group"] = [patient_groups[idx] for idx in y_test]
idf


Out[7]:
RHCE SDC4 PTGES3 PABPC1 MOBP BAZ1B CAMK1 LINC01247 ABAT CARD11 ... AHRR MAP3K8 MAP3K13 LUZP1 DNAJB12 LINC00575 JADE2 G3BP2 KLF13 group
MNC.014 30.634908 56.163999 51.058181 546.322535 56.163999 142.962906 45.952363 0.000000 71.481453 132.751270 ... 71.481453 71.481453 148.068724 142.962906 0.000000 0.000000 265.502540 25.529090 86.798907 viral
MNC.674 0.000000 0.000000 0.000000 1428.182066 196.238757 261.651676 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 141.727991 0.000000 0.000000 0.000000 0.000000 152.630144 0.000000 viral
MN_363 14.414103 14.414103 0.000000 79.277565 100.898719 50.449360 64.863462 57.656411 79.277565 108.105771 ... 100.898719 28.828206 317.110261 122.519873 7.207051 14.414103 79.277565 93.691668 100.898719 bacterial
MNC.098 14.078360 0.000000 211.175402 1956.892061 0.000000 0.000000 0.000000 0.000000 140.783602 126.705241 ... 0.000000 0.000000 126.705241 225.253762 0.000000 0.000000 84.470161 0.000000 126.705241 viral
MNC.112 10.712182 60.702362 32.136545 1799.646498 82.126725 114.263270 0.000000 10.712182 135.687633 28.565817 ... 53.560908 35.707272 174.965632 110.692543 67.843816 21.424363 53.560908 189.248540 89.268179 bacterial
MN_223 0.000000 19.775469 17.797922 1210.258722 0.000000 148.316020 1.977547 0.000000 21.753016 5.932641 ... 0.000000 288.721852 543.825406 35.595845 15.820375 0.000000 0.000000 96.899800 110.742628 bacterial
MNC.292 0.000000 0.000000 0.000000 1231.890890 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 94.277364 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 control
MNC.691 0.000000 0.000000 640.949201 1043.405676 0.000000 462.079657 298.115907 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 119.246363 control
MN_386 0.000000 0.000000 471.343772 736.474643 88.376957 206.212900 132.565436 0.000000 0.000000 0.000000 ... 0.000000 0.000000 73.647464 0.000000 0.000000 0.000000 0.000000 0.000000 132.565436 control
MNC.151 0.000000 0.000000 0.000000 2303.907381 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 control
MNC.114 0.000000 80.468865 93.880343 2802.998806 0.000000 80.468865 0.000000 0.000000 254.818073 0.000000 ... 0.000000 0.000000 93.880343 0.000000 0.000000 0.000000 80.468865 0.000000 590.105012 control
MNC.555 0.000000 0.000000 25.950098 3775.739253 0.000000 311.401176 0.000000 0.000000 0.000000 155.700588 ... 0.000000 116.775441 142.725539 64.875245 129.750490 0.000000 51.900196 142.725539 220.575833 control
MN_224 24.289826 18.217370 18.217370 85.014392 60.724566 48.579652 12.144913 6.072457 54.652109 24.289826 ... 36.434739 48.579652 115.376674 42.507196 18.217370 12.144913 206.463523 72.869479 0.000000 bacterial
MNC.131 0.000000 63.443725 19.033118 139.576196 145.920568 44.410608 82.476843 82.476843 38.066235 120.543078 ... 19.033118 12.688745 260.119274 145.920568 69.788098 0.000000 31.721863 0.000000 6.344373 control
MNC.676 0.000000 0.000000 0.000000 4000.111114 0.000000 527.792439 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 208.339121 0.000000 control
MN_171 0.000000 0.000000 0.000000 532.362304 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 1160.549824 0.000000 0.000000 0.000000 0.000000 819.837949 viral
MN_326 0.000000 0.000000 0.000000 0.000000 0.000000 16.296996 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 control
MN_366 0.000000 226.900290 54.023879 4393.942122 0.000000 857.178873 0.000000 0.000000 18.007960 0.000000 ... 50.422287 39.617511 277.322577 169.274819 54.023879 0.000000 140.462084 226.900290 234.103474 control
MNC.454 0.000000 0.000000 0.000000 1563.166122 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 620.098957 0.000000 2454.558373 control
MNC.452.x 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 bacterial
MNC.412 221.307930 0.000000 0.000000 2021.949723 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 291.724089 40.237805 0.000000 50.297257 734.339949 0.000000 viral
MN_368 0.000000 11.914336 23.828672 399.130253 679.117148 23.828672 83.400351 5.957168 59.571680 5.957168 ... 95.314687 101.271855 256.158222 29.785840 95.314687 0.000000 238.286718 119.143359 29.785840 control
MNC.234 0.000000 93.245310 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 223.788743 0.000000 ... 0.000000 0.000000 0.000000 18.649062 0.000000 0.000000 0.000000 0.000000 0.000000 viral
MNC.473 0.000000 31.125619 27.234917 953.222085 54.469833 81.704750 167.300203 0.000000 23.344214 19.453512 ... 0.000000 38.907024 120.611774 237.332846 50.579131 0.000000 0.000000 105.048964 159.518798 bacterial
MNC.331 0.000000 0.000000 978.430064 2479.430732 744.941072 0.000000 0.000000 0.000000 389.148321 0.000000 ... 0.000000 0.000000 567.044696 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 viral
MN_385 0.000000 19.088705 19.088705 515.395041 1985.225342 146.346740 0.000000 25.451607 133.620936 12.725803 ... 12.725803 31.814509 95.443526 50.903214 76.354821 25.451607 25.451607 120.895133 38.177410 bacterial
MN_201 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 fungal
MNC.276 0.000000 59.978408 19.992803 2199.208285 19.992803 59.978408 19.992803 19.992803 199.928026 259.906434 ... 99.964013 0.000000 99.964013 99.964013 0.000000 79.971210 159.942421 139.949618 279.899236 control
MNC.013 0.000000 0.000000 118.512106 2282.646221 0.000000 108.206706 5.152700 0.000000 0.000000 41.221602 ... 0.000000 0.000000 15.458101 170.039109 15.458101 0.000000 139.122907 273.093114 334.925518 viral
MNC.554 0.000000 2.925773 99.476287 1559.437081 0.000000 90.698967 0.000000 0.000000 0.000000 0.000000 ... 0.000000 11.703093 122.882472 5.851546 17.554639 0.000000 40.960824 175.546388 134.585564 control
MN_137 0.000000 0.000000 0.000000 405.323245 0.000000 135.107748 67.553874 0.000000 67.553874 0.000000 ... 67.553874 202.661623 337.769371 270.215497 0.000000 0.000000 0.000000 0.000000 0.000000 bacterial
MNC.557 0.000000 0.000000 0.000000 852.636067 0.000000 900.004737 15.789557 0.000000 110.526898 15.789557 ... 0.000000 110.526898 0.000000 0.000000 0.000000 0.000000 236.843352 315.791136 0.000000 bacterial
MN_226 0.000000 0.000000 0.000000 0.000000 267.892101 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1174.603829 556.391287 fungal
MN_283 43.127243 43.127243 34.501794 129.381728 120.756279 77.629037 34.501794 43.127243 60.378140 112.130831 ... 138.007176 8.625449 215.636213 69.003588 34.501794 17.250897 69.003588 34.501794 120.756279 bacterial
MNC.012.y 7.700512 7.700512 0.000000 2791.435491 0.000000 211.764072 3.850256 23.101535 38.502558 69.304605 ... 7.700512 53.903582 96.256396 42.352814 115.507675 0.000000 34.652303 300.319956 88.555885 viral
MNC.172 0.000000 31.886738 49.601593 2724.544640 0.000000 88.574273 0.000000 0.000000 0.000000 0.000000 ... 0.000000 74.402389 251.550936 81.488331 0.000000 0.000000 120.461011 336.582238 109.832099 control
MNC.091 0.000000 151.925177 0.000000 2791.625125 0.000000 294.355030 0.000000 0.000000 47.476618 170.915824 ... 0.000000 170.915824 28.485971 0.000000 161.420500 0.000000 588.710060 161.420500 265.869059 viral
MNC.115 0.000000 54.600268 37.050182 1961.709612 5.850029 140.400688 1.950010 0.000000 42.900210 3.900019 ... 0.000000 85.800420 185.250908 48.750239 27.300134 0.000000 19.500096 146.250717 87.750430 control
MNC.071 0.000000 0.000000 0.000000 384.221311 0.000000 0.000000 0.000000 0.000000 102.459016 315.915301 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 614.754098 0.000000 viral
MN_305 0.000000 0.000000 190.882194 159.068495 174.975344 95.441097 63.627398 0.000000 0.000000 0.000000 ... 0.000000 238.602742 254.509592 0.000000 0.000000 0.000000 334.043839 270.416441 0.000000 fungal
MNC.294 0.000000 0.000000 0.000000 3896.217350 1782.126321 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 5084.301564 0.000000 control
MNC.111 0.000000 0.000000 0.000000 3644.501279 0.000000 237.486299 0.000000 0.000000 0.000000 0.000000 ... 0.000000 82.206796 118.743149 0.000000 374.497625 0.000000 365.363537 0.000000 137.011326 control
MNC.054 0.000000 8.651455 56.234454 2054.720450 0.000000 69.211636 34.605818 0.000000 0.000000 43.257273 ... 0.000000 190.332000 129.771818 0.000000 43.257273 0.000000 30.280091 103.817454 134.097545 control

43 rows × 41 columns